---
title: "IV Validity: Determinants of School Numbers"
author: "Andrei Munteanu"
date: "05/10/2021"
output:
  html_document:
    fig_width: 8 
    fig_height: 8 
---
<style type="text/css">
.main-container {
  max-width: 1800px;
  margin-left: auto;
  margin-right: auto;
}
</style>
```{r options, include=FALSE, echo=FALSE, cache=FALSE}

knitr::opts_knit$set(root.dir=getwd())
knitr::opts_chunk$set(cache.lazy=FALSE,cache=TRUE,warning=FALSE,echo=TRUE,comment='',prompt=T)
options(modelsummary_format_numeric_latex='plain')
knitr::opts_template$set(
  regular=list(include=TRUE,echo=TRUE,cache=FALSE),
  regular_cache=list(include=TRUE,echo=TRUE,cache=TRUE),
  invisible=list(include=FALSE,echo=FALSE,cache=FALSE,warning=FALSE,results=FALSE,message=FALSE),
  muted=list(include=TRUE,echo=TRUE,cache=FALSE,warning=FALSE,results=FALSE,message=FALSE),
  latex=list(include=TRUE,echo=TRUE,cache=FALSE))

```

## {.tabset}
### Determinants of Numbers of Schools
First, notice that the correlation between the population in 1992 and the number of schools in towns in 2019 is higher than the 2019 pop and the 2019 number of schools.
```{r, opts.label='regular'}
with(data_town %>% filter(an==2019),cor(pop_1992,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_1993,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_1994,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_1995,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_1996,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_1997,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_1998,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_1999,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2000,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2001,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2002,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2003,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2004,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2005,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2006,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2007,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2008,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2009,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2010,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2011,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2012,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2013,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2014,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2015,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2016,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2017,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2018,n_hs_town,use="complete.obs"))
with(data_town %>% filter(an==2019),cor(pop_2019,n_hs_town,use="complete.obs"))
```
Moreover, the change in pop bw 2008 and 2019 does not predict the change in schools at all
```{r, opts.label='regular'}
data_change<-data_town %>% group_by(judet_bac,town_hs_bac) %>% summarise(dif_schools=n_hs_town[an==2019]-n_hs_town[an==2008],
                                                                         diff_pop=(mean(pop_2019,na.rm=T)-mean(pop_2008,na.rm=T))/1000,
                                                                         pop_2008=mean(pop_2008,na.rm=T)/1000)

model_change1<-feols(dif_schools~
                  diff_pop|
               judet_bac,
             cluster=~judet_bac,
             data=data_change)
summary(model_change1)

model_change2<-feols(dif_schools~
                     pop_2008+
                  diff_pop|
               judet_bac,
             cluster=~judet_bac,
             data=data_change)
summary(model_change2)
```

```{r, opts.label='regular'}
model_1<-feols(as.numeric(n_hs_town)~
                  I(pop_1992/1000)|
               judet_bac,
             cluster=~judet_bac,
             data=data_town2)
summary(model_1)

```
Second, 1992 population explains 97% of the variation in the number of schools across towns.
```{r, opts.label='regular'}
model_2<-feols(as.numeric(n_hs_town)~
               I(pop_1992/1000)+
                  I((pop_2019-pop_1992)/1000)|
               judet_bac,
             cluster=~judet_bac,
             data=data_town2)
summary(model_2)
```
Third, once we control for student pop and 1992 pop, Wages, unemployment levels, hs dropout rates and pop change between 2008-2019 and 1992 does not significantly predict number of high schools.
```{r, opts.label='regular'}
model_3<-feols(as.numeric(n_hs_town)~
               I(n_students_town_yr/1000)+
               #I((n_students_town_yr/1000)^2)+
               #I((n_students_town_yr/1000)^3)+
               #Wages_hs_bac+
               I(pop_1992/1000)+
               #I((pop_1992/1000)^2)+
               #I((`1992`/10000)^3)+
               Wages_hs_bac+
               Unemployment_hs_bac+
               I(drop_hs_hs_bac/100)+
               I((pop_2019-pop_1992)/1000),
               #I((pop_2008_2019-pop_1992)^2/1000),
               #I(Wages_hs_bac^2)+
               #I(Wages_hs_bac^3)+
               #Unemployment_hs_bac+
               #I(Unemployment_hs_bac^2)+
               #I(Unemployment_hs_bac^3)+
               #I(drop_hs_hs_bac/100)+
               #I(drop_hs_hs_bac/100)+
               #I(drop_hs_hs_bac^2)+
               #I(drop_hs_hs_bac^3)+
               #judet_bac,
             cluster=~judet_bac,
             data=data_town2)
summary(model_3)
```


```{r, opts.label='latex'}
print(modelsummary(list(model_3), statistic=NULL,output="latex",estimate="{estimate}{stars}",stars=c('^{*}'=0.1,'^{**}'=0.05,'^{***}'=0.01)))
```
Last, notice that the depopulation between 1992 and 2008-2019 is large and heterogenous (-4% mean, 17% standard dev):
```{r, opts.label='regular'}
hist((data_town$pop_2008_2019-data_town$pop_1992)/data_town$pop_1992)
mean((data_town$pop_2008_2019-data_town$pop_1992)/data_town$pop_1992,na.rm=T)
sd((data_town$pop_2008_2019-data_town$pop_1992)/data_town$pop_1992,na.rm=T)
```
### Latex
```{r, opts.label='regular'}
variables<-c('Timing'='Year',entrance_perc='Admission Score')

options(modelsummary_format_numeric_latex = "plain")
print(modelsummary(list("1"=model_change1,
                        "2"=model_change2,
                         "3"=model_1,
                  "4"=model_2,
                  "5"=model_3),
             statistic = "std.error",
             estimate="{estimate}{stars}",
             output='latex',
             stars=c('^{*}'=0.1,'^{**}'=0.05,'^{***}'=0.01),
             title="Difference-in-Differences Estimate of High School Openings",
             gof_omit = 'AIC|R2 Pseudo|R2 Adj|R2 Within|BIC|Log.Lik.|Sigma',
             coef_omit='nothing',
             coef_rename=variables,
             escape=F))

r2(model_change1)
r2(model_change2)
r2(model_1)
r2(model_2)
r2(model_3)
```


```{r, opts.label='regular'}
g
```

